{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Untitled2.ipynb",
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "PqPsDXQ2n-lz",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import pandas as pd\n",
        "import numpy as np"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4IfW-HMNoRDN",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "train = pd.read_csv(\"NEW_train.csv\")\n",
        "test = pd.read_csv(\"NEW_test.csv\")"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Yz_FSY_docCL",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "test[\"Downloads\"] = 0"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qdx5_A7moYCq",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df = pd.concat([train,test])"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9oZJ26HtoosU",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df.drop(columns = [\"Unnamed: 0\"],inplace = True)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jEQvOC2OpGt5",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df.Rate_Per_100 = (df.Rating * 100)/ df.Reviews"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "stAyVOhgpfqG",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df.drop(columns = [\"OS_Version_Required\"], inplace = True)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "BPlOqXWPpueA",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "a = train[train[\"Downloads\"] == \"5,000,000,000+\"]"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-tMq4Qdbp1YL",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df = pd.concat([df,a])"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "cmrp6zcXolpG",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df.head(n=3)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-DuW71AKoxv_",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "from sklearn.preprocessing import LabelEncoder\n",
        "\n"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "VcTs_oq2rDao",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "le = LabelEncoder()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "McjTT3y5q_Bk",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df.Offered_By = le.fit_transform(df.Offered_By)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "OZjulPxurHK6",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df.Category = le.fit_transform(df.Category)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ugmjBqGArOQi",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df.Content_Rating = le.fit_transform(df.Content_Rating)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Au1zuqHt34Yo",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df.Cat_Con = le.fit_transform(df.Cat_Con)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "1VF3NuWvrVgx",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df.dtypes"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "huglpfeDrz_7",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "train = df.loc[df[\"Downloads\"] != 0]\n",
        "test = df.loc[df[\"Downloads\"] == 0]"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "EAMTWUqEtTfs",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "test.head(n=2)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "tFy67vr2snbF",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "pip install catboost"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9nkxDn1PtDsV",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "x = train.drop(columns = [\"Downloads\"])\n",
        "y = train.Downloads"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "7f6M0QmFsrta",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "from lightgbm import LGBMClassifier\n",
        "lgb = LGBMClassifier(n_estimators = 1500 , learning_rate = 0.01 , depth = 4 , reg_alpha = 1)\n",
        "lgb.fit(x,y)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "cjNwwz6juJKP",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "pred_lgb = lgb.predict_proba(test)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "L0Z2i_B6_Kjk",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "test.drop(columns = [\"Downloads\"],inplace=True)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6O52hg5MuQpx",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "pred_lgb = pd.DataFrame(pred_lgb)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gJ5LuMI3uOeA",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "pred_lgb.to_csv(\"SUJIT2.csv\")"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "oaWT8EQ95Nmv",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "x.drop(columns = [\"Size_Price\",\"Price_rating\",\"Review_Price\"],inplace=True)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "maoXBZBjuo_Q",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "x.head(n=2)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_kxvMuMEt9qj",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "from catboost import CatBoostClassifier\n",
        "cat = CatBoostClassifier(iterations = 500 , depth = 6 ,learning_rate = 0.1 )\n",
        "cat.fit(x,y,cat_features = [0,1,6] , early_stopping_rounds= 50)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "DP_vE2_Jybi1",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "cat_pred = cat.predict_proba(test)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "feqmMHQqyhah",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "cat_pred = pd.DataFrame(cat_pred)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9A3pnj25GUVN",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "pred = pd.DataFrame(pred)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9JYcraBLG897",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "cat_pred = pd.DataFrame(cat_pred)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_gwOV9r8yk0l",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "pred.to_csv(\"sub2.csv\")"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}